1. Read in Datasets

# generate a vector of file names
lists <- list.files("./brain-map-pilot")

# read in all the files
for (i in lists) {
  assign(i, read.table(paste0("./brain-map-pilot/",i)))
}

# generate a vector of unique brain areas
unq <- unique(substr(lists, 4, nchar(lists)-2))

# For loop to combine datasets in the same brain area
for (i in unq) {
  ind <- lists[grepl(i,lists)] # list datasets that include each unqiue brain area
  fa <- sum(substr(ind,1,2) == "FA")
  pm <- sum(substr(ind,1,2) == "PM")
  
  dat <- do.call(cbind,lapply(ind, get)) # cbind all the datasets
  dat.clean <- dat[,colnames(dat)!="V1"] # remove duplicated gene name columns
  
  rownames(dat.clean) <- dat[,1]
  colnames(dat.clean) <- c(paste0("FA_",seq(fa)), paste0("PM_",seq(pm))) # column names
  # colnames(dat.clean) <- c(rep("FA",fa), rep("PM",pm)) # column names

  sam <- data.frame(condition=c(rep("FA",fa), rep("PM",pm))) # generate sample data
  rownames(sam) <- colnames(dat.clean)
  
  assign(i, dat.clean) # assign the combined data into a dataset named after the area
  assign(paste0(i,"_sam"), sam) # assign sample data to unique name

  rm(sam, fa, pm, i,ind, dat, dat.clean) # remove all unnecessary objects
}

rm(lists, list=lists) # remove all unnecessary objects

# save.image(file="Brain6.Rdata")
# load("Brain6.Rdata")


2. Exploratory analysis

# generate a function to plot the cell coverage information
cellcov <- function(data, name) {
    pZero <- mean(data == 0)  # overall proportion of zero counts 
    libsize <- colSums(data)  # overall proportion of zero counts 
    
    # find cell coverage
    coverage <- colMeans(data > 0)
    cell_info <- data.frame(names = colnames(data), coverage = coverage)
    
    # plot cell coverage froms matrix cell_info
    ggplot(cell_info, aes(x = names, y = coverage)) + geom_point() + xlab("Cell Name") + ylab("Coverage") + theme_bw() + labs(title = paste0("Cell Coverage (", name, ")")) + theme(plot.title = element_text(hjust = 0.5, size = 15, face = "bold"))
}

# apply for each dataset
pls <- lapply(unq, function(i) cellcov(get(i), i))

# Put all plots in one grid
do.call(grid.arrange, c(pls, nrow = 2))


3. Filtering Low Expressed Genes using CPM

lowfilt <- function(data) {
    countdata <- data
    
    # Generate the CPM values and then filter.  Note that by converting to CPMs we are normalising for the different sequencing depths for each sample.
    myCPM <- cpm(countdata)
    
    # Which values in myCPM are greater than 0.5?
    threshold <- 0.5
    thresh <- myCPM > threshold
    # This produces a logical matrix with TRUEs and FALSEs head(thresh)
    
    # Summary of how many TRUEs there are in each row.
    table(rowSums(thresh))
    
    # we would like to keep genes that have at least 2 TRUES in each row of thresh
    keep <- rowSums(thresh) >= 1
    # Subset the rows of countdata to keep the more highly expressed genes
    counts.keep <- countdata[keep, ]
    summary(keep)
    
    return(counts.keep)
    # dim(counts.keep)
}

for (i in unq) {
    assign(i, lowfilt(get(i)))
}


4. Check distribution by Relative Log Expression plot and PCA plot

rle.pca <- function(data, name) {
    sam <- get(paste0(name, "_sam"))
    
    set <- newSeqExpressionSet(as.matrix(data), phenoData = sam)
    colors <- brewer.pal(ncol(data), "Set2")
    
    op <- par(mfrow = c(1, 2))
    plotRLE(set, outline = FALSE, ylim = c(-1.5, 1.5), col = colors[sam$condition])
    plotPCA(set, col = colors[sam$condition], cex = 0.5)
    mtext(name)

}

# apply for each dataset
invisible(lapply(unq, function(x) rle.pca(get(x), x)))


Distribution by Relative Log Expression plot & PCA plot


5. Normalization using upper-quartile UQ & Median normalization method

blnorm <- function(data, name, method) {
    sam <- get(paste0(name, "_sam"))
    
    set <- newSeqExpressionSet(as.matrix(data), phenoData = sam)
    colors <- brewer.pal(ncol(data), "Set2")
    
    set <- betweenLaneNormalization(set, which = method)
    
    if (method == "upper") {
        par(mfrow = c(1, 2))
        plotRLE(set, outline = FALSE, ylim = c(-1.5, 1.5), col = colors[sam$condition])
        plotPCA(set, col = colors[sam$condition], cex = 0.5)
        mtext(paste0("UQ - ", name))
    } else {
        par(mfrow = c(1, 2))
        plotRLE(set, outline = FALSE, ylim = c(-1.5, 1.5), col = colors[sam$condition])
        plotPCA(set, col = colors[sam$condition], cex = 0.5)
        mtext(paste0("Median - ", name))
    }
}


(1) Upper-quartile

# apply for each dataset
invisible(lapply(unq, function(x) blnorm(get(x), x, "upper")))


(2) Median

invisible(lapply(unq, function(x) blnorm(get(x), x, "median")))


6. Remove Unwanted Variance (RUVg)

(1) Empirical control genes

If no genes are known a priori not to be influenced by the covariates of interest, one can obtain a set of "in-silico empirical" negative controls, e.g., least significantly DE genes based on a first-pass DE analysis performed prior to RUVg normalization.
bln.set <- function(data, name) {
    sam <- get(paste0(name, "_sam"))
    
    set <- newSeqExpressionSet(as.matrix(data), phenoData = sam)
    set <- betweenLaneNormalization(set, which = "upper")
    return(set)
}

for (i in unq) {
    assign(paste0(i, "_set"), bln.set(get(i), i))
}


ecg <- function(data, name) {
    sam <- get(paste0(name, "_sam"))
    set <- get(paste0(name, "_set"))
    
    design <- model.matrix(~sam$condition, data = pData(set))
    y <- DGEList(counts = counts(set), group = sam$condition)
    y <- calcNormFactors(y, method = "upperquartile")
    y <- estimateGLMCommonDisp(y, design)
    y <- estimateGLMTagwiseDisp(y, design)
    fit <- glmFit(y, design)
    lrt <- glmLRT(fit, coef = 2)
    top <- topTags(lrt, n = nrow(set))$table
    
    empirical <- rownames(set)[which(!(rownames(set) %in% rownames(top)[1:5000]))]
    return(empirical)
}

for (i in unq) {
    assign(paste0(i, "_ecg"), ecg(get(i), i))
}

Here, we consider all but the top 5,000 genes as ranked by edgeR p-values.


(2) RUVg

ruv.g <- function(name) {
    set <- get(paste0(name, "_set"))
    empirical <- get(paste0(name, "_ecg"))
    
    set2 <- RUVg(set, empirical, k = 1)
    # pData(set2)
}

for (i in unq) {
    assign(paste0(i, "_set2"), ruv.g(i))
}

for (i in 1:length(unq)) {
    set2 <- get(paste0(unq[i], "_set2"))
    sam <- get(paste0(unq[i], "_sam"))
    colors <- brewer.pal(nrow(sam), "Set2")
    
    par(mfrow = c(1, 2))
    plotRLE(set2, outline = FALSE, ylim = c(-1, 1), col = colors[sam$condition])
    plotPCA(set2, col = colors[sam$condition], cex = 0.5)
    mtext(unq[i])
}


7. Differentially Expressed Gene Analysis

(1) DEG using edgeR package

edger.deg <- function(name) {
    require(edgeR)
    set2 <- get(paste0(name, "_set2"))
    sam <- get(paste0(name, "_set2"))
    
    design <- model.matrix(~condition + W_1, data = pData(set2))
    y <- DGEList(counts = counts(set2), group = sam$condition)
    
    y <- calcNormFactors(y, method = "upperquartile")
    y <- estimateGLMCommonDisp(y, design)
    y <- estimateGLMTagwiseDisp(y, design)
    
    logCPM <- cpm(y, log = T)
    
    fit <- glmFit(y, design)
    lrt <- glmLRT(fit, coef = 2)
    
    ruv.result <- lrt$table
    top <- topTags(lrt, n = nrow(set2))$table
    top$Gene <- rownames(top)
    top
}

for (i in unq) {
    assign(paste0(i, "_edge"), edger.deg(i))
}


(2) DEG using DESeq2 package

deseq.deg <- function(name) {
    set2 <- get(paste0(name, "_set2"))
    
    dds <- DESeqDataSetFromMatrix(countData = counts(set2), colData = pData(set2), design = ~condition + W_1)
    
    featureData <- data.frame(gene = rownames(set2))
    mcols(dds) <- DataFrame(mcols(dds), featureData)
    
    # keep <- rowSums(counts(dds)) >= 10 dds <- dds[keep,]
    
    dds$condition <- factor(dds$condition, levels = c("FA", "PM"))
    
    
    dds <- DESeq(dds)
    deseq.result <- results(dds, alpha = 0.999999)
    deseq.result <- data.frame(deseq.result)
    
    deseq.result$Gene <- rownames(deseq.result)
    deseq.result$logCPM <- aveLogCPM(counts(set2))
    deseq.result
}

for (i in unq) {
    suppressMessages(assign(paste0(i, "_deseq"), deseq.deg(i)))
}


# save(list=paste0(unq,c('_top','_deseq')), file = 'Brain6_DEG_result.Rdata') 
# load('Brain6_DEG_result.Rdata')


(3) Filter (abs(log2FC) > 1.0, abs(logFC) > 3.0, FDR < 0.05)

filt <- function(name, package, cpmthresh = 1, fcthresh = 1, fdrthresh = 0.05) {
    if (package == "edgeR") {
        top <- get(paste0(name, "_edge"))
        
        top <- top %>% filter(abs(logFC) > fcthresh, abs(logCPM) > cpmthresh, FDR < fdrthresh)
    } else {
        top <- get(paste0(name, "_deseq"))
        
        top <- top %>% filter(abs(log2FoldChange) > fcthresh, abs(logCPM) > cpmthresh, padj < fdrthresh)
    }
    
    top
}

for (i in unq) {
    assign(paste0(i, "_edge_filt_cpm3"), filt(i, "edgeR"))
    assign(paste0(i, "_edge_filt_cpm2"), filt(i, "edgeR", cpmthresh = 2))
    assign(paste0(i, "_edge_filt_cpm1"), filt(i, "edgeR", cpmthresh = 1))
    assign(paste0(i, "_deseq_filt_cpm3"), filt(i, "DESeq2"))
    assign(paste0(i, "_deseq_filt_cpm2"), filt(i, "DESEq2", cpmthresh = 2))
    assign(paste0(i, "_deseq_filt_cpm1"), filt(i, "DESEq2", cpmthresh = 1))
}

From now on, we will focus on genes with |logCPM| > 1.


(4) Volcano Plot

a. Based on log2FoldChange
volcanoplot <- function(res, lfcthresh = 2, sigthresh = 0.05, main = "Volcano Plot", legendpos = "bottomright", labelsig = TRUE, textcx = 1, package = "DESeq2", ...) {
    
    if (package == "DESeq2") {
        
        with(res, plot(log2FoldChange, -log10(pvalue), pch = 20, main = main, ...))
        with(subset(res, padj < sigthresh), points(log2FoldChange, -log10(pvalue), pch = 20, col = "red", ...))
        with(subset(res, abs(log2FoldChange) > lfcthresh), points(log2FoldChange, -log10(pvalue), pch = 20, col = "orange", ...))
        with(subset(res, padj < sigthresh & abs(log2FoldChange) > lfcthresh), points(log2FoldChange, -log10(pvalue), pch = 20, col = "green", ...))
        if (labelsig) {
            require(calibrate)
            with(subset(res, padj < sigthresh & abs(log2FoldChange) > lfcthresh), textxy(log2FoldChange, -log10(pvalue), labs = Gene, cex = textcx, ...))
        }
        legend(legendpos, xjust = 1, yjust = 1, legend = c(paste("FDR<", sigthresh, sep = ""), paste("|LogFC|>", lfcthresh, sep = ""), "both"), pch = 20, col = c("red", "orange", "green"))
        
    } else {
        
        with(res, plot(logFC, -log10(PValue), pch = 20, main = main, ...))
        with(subset(res, FDR < sigthresh), points(logFC, -log10(PValue), pch = 20, col = "red", ...))
        with(subset(res, abs(logFC) > lfcthresh), points(logFC, -log10(PValue), pch = 20, col = "orange", ...))
        with(subset(res, FDR < sigthresh & abs(logFC) > lfcthresh), points(logFC, -log10(PValue), pch = 20, col = "green", ...))
        if (labelsig) {
            require(calibrate)
            with(subset(res, FDR < sigthresh & abs(logFC) > lfcthresh), textxy(logFC, -log10(PValue), labs = Gene, cex = textcx, ...))
        }
        legend(legendpos, xjust = 1, yjust = 1, legend = c(paste("FDR<", sigthresh, sep = ""), paste("|LogFC|>", lfcthresh, sep = ""), "both"), pch = 20, col = c("red", "orange", "green"))
        
    }
    
}

par(mfrow = c(3, 2))
volcanoplot(Cerebellum_edge, package = "edgeR", main = "Volcano Plot edgeR - Cerebellum", xlim = c(-15, 15), ylim = c(-1, 320))
volcanoplot(Cortex_edge, package = "edgeR", main = "Volcano Plot edgeR - Cortex", xlim = c(-15, 15), ylim = c(-1, 320))
volcanoplot(Hippocampus_edge, package = "edgeR", main = "Volcano Plot edgeR - Hippocampus", xlim = c(-15, 15), ylim = c(-1, 320))
volcanoplot(Hypothalamus_edge, package = "edgeR", main = "Volcano Plot edgeR - Hypothalamus", xlim = c(-15, 15), ylim = c(-1, 320))
volcanoplot(RVLM_edge, package = "edgeR", main = "Volcano Plot edgeR - RVLM", xlim = c(-15, 15), ylim = c(-1, 320))
volcanoplot(Striatum_edge, package = "edgeR", main = "Volcano Plot edgeR - Striatum", xlim = c(-15, 15), ylim = c(-1, 320))

par(mfrow = c(3, 2))
volcanoplot(Cerebellum_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Cerebellum", xlim = c(-15, 15), ylim = c(-1, 270))
volcanoplot(Cortex_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Cortex", xlim = c(-15, 15), ylim = c(-1, 270))
volcanoplot(Hippocampus_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Hippocampus", xlim = c(-15, 15), ylim = c(-1, 270))
volcanoplot(Hypothalamus_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Hypothalamus", xlim = c(-15, 15), ylim = c(-1, 270))
volcanoplot(RVLM_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - RVLM", xlim = c(-15, 15), ylim = c(-1, 270))
volcanoplot(Striatum_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Striatum", xlim = c(-15, 15), ylim = c(-1, 270))


b. Based on logCPM
volcanoplot2 <- function(res, lfcthresh = 1, lcpmthresh = 1, sigthresh = 0.05, main = "Volcano Plot", legendpos = "bottomright", labelsig = TRUE, textcx = 0.8, package = "edgeR", ...) {
    if (package == "edgeR") {
        with(res, plot(logCPM, -log10(FDR), pch = 20, main = main, ...))
        with(subset(res, FDR < sigthresh), points(logCPM, -log10(FDR), pch = 20, col = "red", ...))
        with(subset(res, abs(logCPM) > lcpmthresh), points(logCPM, -log10(FDR), pch = 20, col = "orange", ...))
        with(subset(res, FDR < sigthresh & abs(logCPM) > lcpmthresh), points(logCPM, -log10(FDR), pch = 20, col = "green", ...))
        if (labelsig) {
            require(calibrate)
            with(subset(res, FDR < sigthresh & abs(logCPM) > lcpmthresh), textxy(logCPM, -log10(FDR), labs = Gene, cex = textcx, ...))
        }
        legend(legendpos, xjust = 1, yjust = 1, legend = c(paste("FDR<", sigthresh, sep = ""), paste("|LogCPM|>", lcpmthresh, sep = ""), "both"), pch = 20, col = c("red", "orange", "green"))
    } else if (package == "DESeq2") {
        with(res, plot(logCPM, -log10(padj), pch = 20, main = main, ...))
        with(subset(res, padj < sigthresh), points(logCPM, -log10(padj), pch = 20, col = "red", ...))
        with(subset(res, abs(logCPM) > lcpmthresh), points(logCPM, -log10(padj), pch = 20, col = "orange", ...))
        with(subset(res, padj < sigthresh & abs(logCPM) > lcpmthresh), points(logCPM, -log10(padj), pch = 20, col = "green", ...))
        if (labelsig) {
            require(calibrate)
            with(subset(res, padj < sigthresh & abs(logCPM) > lcpmthresh), textxy(logCPM, -log10(padj), labs = Gene, cex = textcx, ...))
        }
        legend(legendpos, xjust = 1, yjust = 1, legend = c(paste("FDR<", sigthresh, sep = ""), paste("|LogCPM|>", lcpmthresh, sep = ""), "both"), pch = 20, col = c("red", "orange", "green"))
    }
}

# par(mfrow = c(2, 3))
# volcanoplot2(Cerebellum_edge, package = "edgeR", main = "Volcano Plot edgeR - Cerebellum", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Cortex_edge, package = "edgeR", main = "Volcano Plot edgeR - Cortex", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Hippocampus_edge, package = "edgeR", main = "Volcano Plot edgeR - Hippocampus", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Hypothalamus_edge, package = "edgeR", main = "Volcano Plot edgeR - Hypothalamus", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(RVLM_edge, package = "edgeR", main = "Volcano Plot edgeR - RVLM", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Striatum_edge, package = "edgeR", main = "Volcano Plot edgeR - Striatum", xlim = c(-7, 15), ylim = c(-1, 10))
# 
# par(mfrow = c(2, 3))
# volcanoplot2(Cerebellum_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Cerebellum", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Cortex_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Cortex", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Hippocampus_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Hippocampus", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Hypothalamus_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Hypothalamus", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(RVLM_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - RVLM", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Striatum_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Striatum", xlim = c(-7, 15), ylim = c(-1, 10))
  • Cerebellum, Cortex, Hippocampus, Striatum은 EdgeR에서 더 gene detection이 잘 되고, Hypothalamus와 RVLM은 DESeq2에서 더 잘 되는 것을 볼 수 있습니다.
  • 전반적으로 EdgeR이 down-regulated 된 gene을 더 잘 detect하고 있습니다.
  • DESeq2 결과물은 up-과 down-regulated gene들의 차이가 매우 큽니다.


8. Differential Expression (DE) with logCPM = 1

(1) Why logCPM = 1?


The barplot above showed the number of genes expressed by logCPM value. Since we want to take a look at the genes as many as possible, we decided to use logCPM = 1 for further analysis.


(2) Summary of DE Analysis

deg.sum <- function(name, package= "edge", thresh = 1) {
  if (package=="edge") {
    top <- get(paste0(name,"_", package))
    topfilt <- get(paste0(name,"_", package, "_filt_cpm", thresh))
    ## entire dataset
    t.insig <- top[!top$Gene %in% topfilt$Gene,]
    
    ### Up-regulated in PM
    t.up <-  topfilt %>%
      dplyr::filter(logCPM > thresh, FDR< 0.05)
    
    t.down <- topfilt %>%
      dplyr::filter(logCPM < -thresh, FDR< 0.05)
    
    
    t <- data.frame(Insig = nrow(t.insig), 
                    Up = nrow(t.up), 
                    Down = nrow(t.down),
                    Total = nrow(t.insig) + nrow(t.up) + nrow(t.down)
    )
  } else {
    top <- get(paste0(name,"_", package))
    topfilt <- get(paste0(name,"_", package, "_filt_cpm", thresh))
    ## entire dataset
    t.insig <- top[!top$Gene %in% topfilt$Gene,]
    
    ### Up-regulated in PM
    t.up <-  topfilt %>%
      dplyr::filter(logCPM > thresh, padj < 0.05)
    
    t.down <- topfilt %>%
      dplyr::filter(logCPM < -thresh, padj < 0.05)
    
    
    t <- data.frame(Insig = nrow(t.insig), 
                    Up = nrow(t.up), 
                    Down = nrow(t.down),
                    Total = nrow(t.insig) + nrow(t.up) + nrow(t.down)
    )
  }

  t
}

res1_edge <- do.call(rbind,lapply(unq, function(i) deg.sum(i, thresh = 1)))
rownames(res1_edge) <- unq
res1_deseq <- do.call(rbind,lapply(unq, function(i) deg.sum(i, package="deseq",thresh = 1)))
rownames(res1_deseq) <- unq


a. Based on edgeR
knitr::kable(res1_edge, caption = "DE Analysis (using EdgeR package)")
DE Analysis (using EdgeR package)
Insig Up Down Total
Cerebellum 16241 32 63 16336
Cortex 16164 81 68 16313
Hippocampus 15799 256 108 16163
Hypothalamus 17844 12 16 17872
RVLM 18310 7 148 18465
Striatum 15835 783 296 16914


b. Based on DESeq2
knitr::kable(res1_deseq, caption = "DE Analysis (using DESeq2 package)")
DE Analysis (using DESeq2 package)
Insig Up Down Total
Cerebellum 16326 10 0 16336
Cortex 16272 41 0 16313
Hippocampus 16151 12 0 16163
Hypothalamus 15252 2422 198 17872
RVLM 18082 331 52 18465
Striatum 16769 145 0 16914


(2) Up-Down Plot

updownplot <- function(name, thresh = 1, package="edge") {
  if (package=="edge") {
    x <- get(paste0(name,"_edge_filt_cpm", thresh))
    t <- x[order(x$PValue),]
    if (any(t$logCPM < 0)) {
      barplot(t$logCPM, names.arg= t$Gene, horiz=T, las=1, col= ifelse(t$logCPM > 0, "darkred", "darkblue"), main = paste0(name, " logCPM (lowest to highest p-value)"));abline(v=0)
    } else {
      barplot(t$logCPM, names.arg= t$Gene, horiz=T, las=1, col="darkred", main = paste0(name, " logCPM (lowest to highest p-value)"))
    }    
  } else {
    x <- get(paste0(name,"_deseq_filt_cpm", thresh))
    t <- x[order(x$pvalue),]
    if (any(t$logCPM < 0)) {
      barplot(t$logCPM, names.arg= t$Gene, horiz=T, las=1, col= ifelse(t$logCPM > 0, "darkred", "darkblue"), main = paste0(name, " logCPM (lowest to highest p-value)"));abline(v=0)
    } else {
      barplot(t$logCPM, names.arg= t$Gene, horiz=T, las=1, col="darkred", main = paste0(name, " logCPM (lowest to highest p-value)"))
    }
  }
}


a. Based on edgeR
par(mfrow=c(2,3), mar=c(3,5,3,3))
updownplot("Cerebellum")
updownplot("Cortex")
updownplot("Hippocampus")
updownplot("Hypothalamus")
updownplot("RVLM")
updownplot("Striatum")


b. Based on DESeq2
par(mfrow=c(2,3), mar=c(3,5,3,3))
updownplot("Cerebellum", package="deseq")
updownplot("Cortex", package="deseq")
updownplot("Hippocampus", package="deseq")
updownplot("Hypothalamus", package="deseq")
updownplot("RVLM", package="deseq")
updownplot("Striatum", package="deseq")


(3) Function for generating gene lists

compare <- function(name) {
  a <- get(paste0(name,"_edge_filt_cpm1"))
  b <- get(paste0(name,"_deseq_filt_cpm1"))
  
  a <- a %>%
    mutate(updown = ifelse(logCPM > 0, "up","down"), package="edgeR") %>%
    arrange(desc(updown)) %>%
    dplyr::select(Gene, updown)
  
  b <- b %>%
    mutate(updown = ifelse(logCPM > 0, "up","down"), package="DESeq2") %>%
    arrange(desc(updown)) %>%
    dplyr::select(Gene, updown)


  both <- intersect(a$Gene, b$Gene)
  edge_only <- setdiff(a$Gene, b$Gene)
  deseq_only <- setdiff(b$Gene, a$Gene)

  both.df <- a[a$Gene %in% both,]
  both.df <- both.df %>% mutate(packageOnly="Both")
  edge_only.df <- a[a$Gene %in% edge_only,]
  edge_only.df <- edge_only.df %>% mutate(packageOnly="edgeR")
  deseq_only.df <- b[b$Gene %in% deseq_only,]
  deseq_only.df <- deseq_only.df %>% mutate(packageOnly="DESeq2")

  tab <- rbind(both.df, edge_only.df, deseq_only.df)
  # knitr::kable(tab)
  # a
  return(list(tab, name))
}

t <- lapply(unq, compare)


par(mfrow=c(2,3))
invisible(lapply(t, function(x)  { 
  edgeR <- ifelse(x[[1]]$packageOnly %in% c("Both", "edgeR"),1,0)
  DESeq2 <- ifelse(x[[1]]$packageOnly %in% c("Both", "DESeq2"),1,0)
  
  c <- cbind(edgeR, DESeq2)
  vennDiagram(c);mtext(x[[2]])
}))

# write.csv(t[[1]], file = "Cerebellum_genelist.csv") 
# write.csv(t[[2]], file = "Cortex_genelist.csv") 
# write.csv(t[[3]], file = "Hippocampus_genelist.csv") 
# write.csv(t[[4]], file = "Hypothalamus_genelist.csv") 
# write.csv(t[[5]], file = "RVLM_genelist.csv") 
# write.csv(t[[6]], file = "Striatum_genelist.csv") 


9. GO term analysis

Criteria:
    
- p-value cutoff: 0.15
- by package (2 options): `edgeR`, `DESeq2`
- by direction (2 options): Up, Down
- by GO Hierarchy 3 Sub-Ontologies: `BP (Biological Process)`, `MF (Molecular Function)`, `CC (Cellular Component)` 

Most of the genes had p-values less than 0.1 (or slightly above). Hence, we decided to use the p-value cutoff = 0.15.


goterm <- function(name, package="edge", thresh=1, direction = "up", pcut = 0.1, ontm="BP") {
    # universal
    t <- get(paste0(name,"_",package))
    universe_gene.df <- bitr(rownames(t), fromType = "SYMBOL",
                             toType = c("ENSEMBL","ENTREZID"),
                             OrgDb = org.Mm.eg.db)
    
    # upregulated, downregulated genes
    t_filt <- get(paste0(name, "_",package,"_filt_cpm",thresh))
    
    if (direction == "up") {
        up_gene.df <- t_filt %>%
            filter(logCPM > 0) %>% 
            dplyr::select(Gene, logCPM)
        
        up_bitr <- bitr(up_gene.df$Gene, fromType = "SYMBOL",
                        toType = c("ENSEMBL","ENTREZID"),
                        OrgDb = org.Mm.eg.db)
        # ego
        ego <- enrichGO(gene = up_bitr[,3],
                        universe   = universe_gene.df[,3],
                        OrgDb     = org.Mm.eg.db,
                        ont      = ontm,
                        pAdjustMethod = "BH",
                        pvalueCutoff = pcut,
                        qvalueCutoff = 0.1,
                        readable   = TRUE)
    } else {
        down_gene.df <- t_filt %>% filter(logCPM < 0) %>% dplyr::select(Gene, logCPM)
        down_bitr <- bitr(down_gene.df$Gene, fromType = "SYMBOL",
                          toType = c("ENSEMBL","ENTREZID"),
                          OrgDb = org.Mm.eg.db)
        
        # ego
        ego <- enrichGO(gene = down_bitr[,3],
                        universe   = universe_gene.df[,3],
                        OrgDb     = org.Mm.eg.db,
                        ont      = ontm,
                        pAdjustMethod = "BH",
                        pvalueCutoff = pcut,
                        qvalueCutoff = 0.1,
                        readable   = TRUE)
    }
    
    ego
}


(1) Based on edgeR

a. BP
- Up
ego_list_up_edge.15 <- lapply(unq, function(x) goterm(x, direction = "up", pcut = 0.15))
dotplot(ego_list_up_edge.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(edgeR, Up, p<0.15, BP)")

dotplot(ego_list_up_edge.15[[2]], orderBy = "p.adjust", title="Cortex\n(edgeR, Up, p<0.15, BP)")

dotplot(ego_list_up_edge.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(edgeR, Up, p<0.15, BP)")

dotplot(ego_list_up_edge.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(edgeR, Up, p<0.15, BP)")

dotplot(ego_list_up_edge.15[[5]], orderBy = "p.adjust", title="RVLM\n(edgeR, Up, p<0.15, BP)")

dotplot(ego_list_up_edge.15[[6]], orderBy = "p.adjust", title="Striatum\n(edgeR, Up, p<0.15, BP)")


- Down
ego_list_down_edge.15 <- lapply(unq, function(x) goterm(x, direction = "down", pcut = 0.15))
dotplot(ego_list_down_edge.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(edgeR, Down, p<0.15, BP)")

dotplot(ego_list_down_edge.15[[2]], orderBy = "p.adjust", title="Cortex\n(edgeR, Down, p<0.15, BP)")

dotplot(ego_list_down_edge.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(edgeR, Down, p<0.15, BP)")

dotplot(ego_list_down_edge.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(edgeR, Down, p<0.15, BP)")

dotplot(ego_list_down_edge.15[[5]], orderBy = "p.adjust", title="RVLM\n(edgeR, Down, p<0.15, BP)")

dotplot(ego_list_down_edge.15[[6]], orderBy = "p.adjust", title="Striatum\n(edgeR, Down, p<0.15, BP)")


b. MF
- Up
ego_list_up_edge.15 <- lapply(unq, function(x) goterm(x, direction = "up", pcut = 0.15, ontm="MF"))
dotplot(ego_list_up_edge.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(edgeR, Up, p<0.15, MF)")

dotplot(ego_list_up_edge.15[[2]], orderBy = "p.adjust", title="Cortex\n(edgeR, Up, p<0.15, MF)")

dotplot(ego_list_up_edge.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(edgeR, Up, p<0.15, MF)")

dotplot(ego_list_up_edge.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(edgeR, Up, p<0.15, MF)")

dotplot(ego_list_up_edge.15[[5]], orderBy = "p.adjust", title="RVLM\n(edgeR, Up, p<0.15, MF)")

dotplot(ego_list_up_edge.15[[6]], orderBy = "p.adjust", title="Striatum\n(edgeR, Up, p<0.15, MF)")


- Down
ego_list_down_edge.15 <- lapply(unq, function(x) goterm(x, direction = "down", pcut = 0.15, ontm="MF"))
dotplot(ego_list_down_edge.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(edgeR, Down, p<0.15, MF)")

dotplot(ego_list_down_edge.15[[2]], orderBy = "p.adjust", title="Cortex\n(edgeR, Down, p<0.15, MF)")

dotplot(ego_list_down_edge.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(edgeR, Down, p<0.15, MF)")

dotplot(ego_list_down_edge.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(edgeR, Down, p<0.15, MF)")

dotplot(ego_list_down_edge.15[[5]], orderBy = "p.adjust", title="RVLM\n(edgeR, Down, p<0.15, MF)")

dotplot(ego_list_down_edge.15[[6]], orderBy = "p.adjust", title="Striatum\n(edgeR, Down, p<0.15, MF)")


c. CC
- Up
ego_list_up_edge.15 <- lapply(unq, function(x) goterm(x, direction = "up", pcut = 0.15, ontm="CC"))
dotplot(ego_list_up_edge.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(edgeR, Up, p<0.15, CC)")

dotplot(ego_list_up_edge.15[[2]], orderBy = "p.adjust", title="Cortex\n(edgeR, Up, p<0.15, CC)")

dotplot(ego_list_up_edge.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(edgeR, Up, p<0.15, CC)")

dotplot(ego_list_up_edge.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(edgeR, Up, p<0.15, CC)")

dotplot(ego_list_up_edge.15[[5]], orderBy = "p.adjust", title="RVLM\n(edgeR, Up, p<0.15, CC)")

dotplot(ego_list_up_edge.15[[6]], orderBy = "p.adjust", title="Striatum\n(edgeR, Up, p<0.15, CC)")


- Down
ego_list_down_edge.15 <- lapply(unq, function(x) goterm(x, direction = "down", pcut = 0.15, ontm="CC"))
dotplot(ego_list_down_edge.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(edgeR, Down, p<0.15, CC)")

dotplot(ego_list_down_edge.15[[2]], orderBy = "p.adjust", title="Cortex\n(edgeR, Down, p<0.15, CC)")

dotplot(ego_list_down_edge.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(edgeR, Down, p<0.15, CC)")

dotplot(ego_list_down_edge.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(edgeR, Down, p<0.15, CC)")

dotplot(ego_list_down_edge.15[[5]], orderBy = "p.adjust", title="RVLM\n(edgeR, Down, p<0.15, CC)")

dotplot(ego_list_down_edge.15[[6]], orderBy = "p.adjust", title="Striatum\n(edgeR, Down, p<0.15, CC)")


(2) Based on DESeq2

a. BP
- Up
ego_list_up_deseq.15 <- lapply(unq, function(x) goterm(x, package="deseq", direction = "up", pcut = 0.15))
dotplot(ego_list_up_deseq.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(DESeq2, Up, p<0.15, BP)")

dotplot(ego_list_up_deseq.15[[2]], orderBy = "p.adjust", title="Cortex\n(DESeq2, Up, p<0.15, BP)")

dotplot(ego_list_up_deseq.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(DESeq2, Up, p<0.15, BP)")

dotplot(ego_list_up_deseq.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(DESeq2, Up, p<0.15, BP)")

dotplot(ego_list_up_deseq.15[[5]], orderBy = "p.adjust", title="RVLM\n(DESeq2, Up, p<0.15, BP)")

dotplot(ego_list_up_deseq.15[[6]], orderBy = "p.adjust", title="Striatum\n(DESeq2, Up, p<0.15, BP)")


- Down
ego_list_down_deseq.15 <- lapply(unq, function(x) goterm(x, package="deseq", direction = "down", pcut = 0.15))
# dotplot(ego_list_down_deseq.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(DESeq2, Down, p<0.15, BP)")
# dotplot(ego_list_down_deseq.15[[2]], orderBy = "p.adjust", title="Cortex\n(DESeq2, Down, p<0.15, BP)")
# dotplot(ego_list_down_deseq.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(DESeq2, Down, p<0.15, BP)")
dotplot(ego_list_down_deseq.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(DESeq2, Down, p<0.15, BP)")

dotplot(ego_list_down_deseq.15[[5]], orderBy = "p.adjust", title="RVLM\n(DESeq2, Down, p<0.15, BP)")

# dotplot(ego_list_down_deseq.15[[6]], orderBy = "p.adjust", title="Striatum\n(DESeq2, Down, p<0.15, BP)")


b. MF
- Up
ego_list_up_deseq.15 <- lapply(unq, function(x) goterm(x, package="deseq", direction = "up", pcut = 0.15, ontm="MF"))
dotplot(ego_list_up_deseq.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(DESeq2, Up, p<0.15, MF)")

dotplot(ego_list_up_deseq.15[[2]], orderBy = "p.adjust", title="Cortex\n(DESeq2, Up, p<0.15, MF)")

dotplot(ego_list_up_deseq.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(DESeq2, Up, p<0.15, MF)")

dotplot(ego_list_up_deseq.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(DESeq2, Up, p<0.15, MF)")

dotplot(ego_list_up_deseq.15[[5]], orderBy = "p.adjust", title="RVLM\n(DESeq2, Up, p<0.15, MF)")

dotplot(ego_list_up_deseq.15[[6]], orderBy = "p.adjust", title="Striatum\n(DESeq2, Up, p<0.15, MF)")


- Down
ego_list_down_deseq.15 <- lapply(unq, function(x) goterm(x, package="deseq", direction = "down", pcut = 0.15, ontm="MF"))
# dotplot(ego_list_down_deseq.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(DESeq2, Down, p<0.15, MF)")
# dotplot(ego_list_down_deseq.15[[2]], orderBy = "p.adjust", title="Cortex\n(DESeq2, Down, p<0.15, MF)")
# dotplot(ego_list_down_deseq.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(DESeq2, Down, p<0.15, MF)")
dotplot(ego_list_down_deseq.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(DESeq2, Down, p<0.15, MF)")

dotplot(ego_list_down_deseq.15[[5]], orderBy = "p.adjust", title="RVLM\n(DESeq2, Down, p<0.15, MF)")

# dotplot(ego_list_down_deseq.15[[6]], orderBy = "p.adjust", title="Striatum\n(DESeq2, Down, p<0.15, MF)")


c. CC
- Up
ego_list_up_deseq.15 <- lapply(unq, function(x) goterm(x, package="deseq", direction = "up", pcut = 0.15, ontm="CC"))
dotplot(ego_list_up_deseq.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(DESeq2, Up, p<0.15, CC)")

dotplot(ego_list_up_deseq.15[[2]], orderBy = "p.adjust", title="Cortex\n(DESeq2, Up, p<0.15, CC)")

dotplot(ego_list_up_deseq.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(DESeq2, Up, p<0.15, CC)")

dotplot(ego_list_up_deseq.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(DESeq2, Up, p<0.15, CC)")

dotplot(ego_list_up_deseq.15[[5]], orderBy = "p.adjust", title="RVLM\n(DESeq2, Up, p<0.15, CC)")

dotplot(ego_list_up_deseq.15[[6]], orderBy = "p.adjust", title="Striatum\n(DESeq2, Up, p<0.15, CC)")


- Down
ego_list_down_deseq.15 <- lapply(unq, function(x) goterm(x, package="deseq", direction = "down", pcut = 0.15, ontm="CC"))
# dotplot(ego_list_down_deseq.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(DESeq2, Down, p<0.15, CC)")
# dotplot(ego_list_down_deseq.15[[2]], orderBy = "p.adjust", title="Cortex\n(DESeq2, Down, p<0.15, CC)")
# dotplot(ego_list_down_deseq.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(DESeq2, Down, p<0.15, CC)")
dotplot(ego_list_down_deseq.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(DESeq2, Down, p<0.15, CC)")

dotplot(ego_list_down_deseq.15[[5]], orderBy = "p.adjust", title="RVLM\n(DESeq2, Down, p<0.15, CC)")

# dotplot(ego_list_down_deseq.15[[6]], orderBy = "p.adjust", title="Striatum\n(DESeq2, Down, p<0.15, CC)")


End of Document